//new parent distribution: Mean and SD of parent human capital from 2000 Census
//open data
use "$temp/census_p25_sample", clear



****normalize by mean earnings
preserve
	use "$temp/wage_norm", clear
	su mean
	local norm `r(mean)'
restore

replace incwage = incwage/`norm' //normalize by mean PSID earnings


//some summary wage stats for early life cycle
su incwage [fw=perwt] if relate == 1 & age>=18 & age<=36 & !coll
su incwage [fw=perwt] if relate == 1 & age>=18 & age<=36 & coll


***sample restrictions: first check correlation of head/spousal HC
//keep if relate == 1 //restriction to household heads/men
keep if age>=36 & age<=54 //age restriction
keep if nchild>0
//keep if nchild==1
su incwage, d
keep if relate == 1 | relate == 2



****normalize income
ren statefip statefips
merge m:1 statefips using "$temp/skill_prices_all", keep(match) nogen

replace skill_price_2010 = exp(log(skill_price_2000) - 0.145) if racetype == 2
replace skill_price_2010 = exp(log(skill_price_2000) - 0.087) if racetype == 3

replace skill_price_coll_2010 = exp(log(skill_price_coll_2000) - 0.145) if racetype == 2
replace skill_price_coll_2010 = exp(log(skill_price_coll_2000) - 0.087) if racetype == 3


replace incwage = incwage/skill_price_2000 if !coll //normalize by skill prices
replace incwage = incwage/skill_price_coll_2000 if coll //normalize by skill prices

su incwage if !coll & relate==1 & sex==1, d
su incwage if coll & relate==1 & sex==1, d //looks different from PSID -- why?

***after all this, earnings now map 1-1 to parent HC (will have to move to wages down the road)
replace incwage = incwage * (2080/hours) //hours normalization
ren incwage hc
su hc [fw=perwt], d
drop if hc>6 & hc!=.
su hc [fw=perwt] if !coll & relate==1 & sex==1, d
su hc [fw=perwt] if coll & relate==1 & sex==1, d //now more reasonable!
su hc if relate==1, d // for initial condition

//correlation of spousal human capital and education
keep if relate==1 | relate==2
gen temp = coll if relate==1
bys serial: egen headcoll = max(temp)
drop temp

gen temp = coll if relate==2
bys serial: egen wifecoll = max(temp)
drop temp

gen temp = hc if relate==1
bys serial: egen headhc = max(temp)
drop temp 

gen temp = hc if relate==2
bys serial: egen wifehc = max(temp)
drop temp

gen nilf = (hours==0)
gen temp = nilf if relate==2
bys serial: egen wife_nowork =  max(temp)
drop temp

//spousal coll, hc
gen spouse_coll = .
replace spouse_coll = headcoll if relate==2
replace spouse_coll = wifecoll if relate==1

gen spouse_hc = .
replace spouse_hc = headhc if relate==2
replace spouse_hc = wifehc if relate==1

//test
corr hc spouse_hc if spouse_hc!=. & relate==1 //way less orrelated than I thought; not that it matters all that much.
tab coll spouse_coll if relate==1

//for now sticking with assortive mating
keep if relate == 1
gen married = (marst==1)
drop if flag_drop
ren wife_nowork spouse_nowork

//probabilites of each type and distributions of HC
keep married headcoll spouse_coll headhc spouse_hc spouse_nowork perwt statefips racetype

//types, by college edu: single, married with non-working spouse, married with working HS spouse, married with working coll spouse
gen count = 1
gen type = 1
replace type = 2 if !headcoll & married & spouse_nowork
replace type = 3 if !headcoll & married & !spouse_nowork & !spouse_coll
replace type = 4 if !headcoll & married & !spouse_nowork & spouse_coll
replace type = 5 if headcoll & !married
replace type = 6 if headcoll & married & spouse_nowork
replace type = 7 if headcoll & married & !spouse_nowork & !spouse_coll
replace type = 8 if headcoll & married & !spouse_nowork & spouse_coll

gen flag_race_1 = (racetype == 1)
gen flag_race_2 = (racetype == 2)
gen flag_race_3 = (racetype == 3)

preserve
collapse (mean) flag* (sum) count (mean) hc_mean = headhc spouse_hc_mean = spouse_hc (sd) hc_sd = headhc spouse_hc_sd = spouse_hc [fw=perwt], by(type statefips)
bys statefips: egen totalcount = total(count)
gen frac = count/totalcount
drop count totalcount
sort statefips type

//generate log-normal paramters
gen hc_mean_log = log(hc_mean^2 / sqrt(hc_mean^2 + hc_sd^2))
gen hc_sd_log = log(1 + (hc_sd^2)/(hc_mean^2))
gen sp_hc_mean_log = log(spouse_hc_mean^2 / sqrt(spouse_hc_mean^2 + spouse_hc_sd^2))
gen sp_hc_sd_log = log(1 + (spouse_hc_sd^2)/(spouse_hc_mean^2))

keep statefips type frac hc_mean_log hc_sd_log sp_hc_mean_log sp_hc_sd_log flag*
order statefips type frac hc_mean_log hc_sd_log sp_hc_mean_log sp_hc_sd_log flag*
export delimited "$model/utilities/state_parent_distributions.csv", novarn replace nolabel
gen uniqid = _n
save "$temp/state_parent_distributions", replace


restore



//for decomposition exercise
levelsof statefips, clean local(fips)
collapse (mean) flag* (sum) count (mean) hc_mean = headhc spouse_hc_mean = spouse_hc (sd) hc_sd = headhc spouse_hc_sd = spouse_hc [fw=perwt], by(type)
egen totalcount = total(count)
gen frac = count/totalcount
drop count totalcount
sort type

//generate log-normal paramters
gen hc_mean_log = log(hc_mean^2 / sqrt(hc_mean^2 + hc_sd^2))
gen hc_sd_log = log(1 + (hc_sd^2)/(hc_mean^2))
gen sp_hc_mean_log = log(spouse_hc_mean^2 / sqrt(spouse_hc_mean^2 + spouse_hc_sd^2))
gen sp_hc_sd_log = log(1 + (spouse_hc_sd^2)/(spouse_hc_mean^2))

foreach fip in `fips'{
	preserve
	gen statefips = `fip'
	save "$temp/parent_dist_decomp_`fip'", replace
	restore
}

clear
foreach fip in `fips'{
	append using "$temp/parent_dist_decomp_`fip'"
}


keep statefips type frac hc_mean_log hc_sd_log sp_hc_mean_log sp_hc_sd_log flag*
order statefips type frac hc_mean_log hc_sd_log sp_hc_mean_log sp_hc_sd_log flag*
export delimited "$model/utilities/state_parent_distributions_decomp_d.csv", novarn replace



